# DEFINE VARIABLES AND CUTOFF
dep <- 'partyUNION'
run <- 'BDATE01'
cutoff <- 0
order <- 1
contVarBase <-  c("age", 
                  'married',
                  'nchild',
                  "nodegree", 
                  'income',
                  'workexperience',
                  'satishhinc')

# LOAD DATA
load('data/TEMPORARY.RData')
df_dur <- df %>%
  subset(., female == 1) %>%
  subset(., as.numeric(lubridate::year(BDATE01)) %in% c(1972:2012)) %>%
  mutate(workexperience = ftexperience+ptexperience) %>%
  mutate(nodegree = educCat_0+educCat_1) %>%
  mutate(income = log(income+1)) %>%
  mutate(y = get(dep)) %>%
  mutate(x = (interval(as.Date(get(run)), as.Date('1992-01-01')) %/% months(1))-1 %>% as.numeric()) %>%
  mutate(x_original = (interval(as.Date(get(run)), as.Date('1992-01-01')) %/% months(1)) %>% as.numeric())
  
# SELECT BASELINE RESPONDENTS
pid_sel <- df_dur %>%
  subset(., syear %in% c(2012:2014)) %>%
  select(x, y, x_original, pid, hid, all_of(contVarBase)) %>%
  subset(., complete.cases(.)) %>%
  group_by(pid) %>% mutate(n_pid = n()) %>% ungroup() %>% subset(., n_pid >= 2) %>%
  pull(pid) %>% unique() %>% sort()

# CREATE BASELINE DATA
df <- df_dur %>% 
  subset(., pid %in% pid_sel) %>%
  subset(., syear %in% c(2012:2014))

# CREATE DATA FOR MCCRARY TEST
df_mccrary <- df %>% group_by(pid) %>% slice(1) %>% ungroup()
dat_temp_mccrary <- rdd_data(df_mccrary$y, df_mccrary$x_original, covar = df_mccrary[,contVarBase], cutpoint = cutoff)

# CREATE BASELINE RDD DATA
dat_temp <- rdd_data(df$y, df$x, covar = df[,contVarBase], cutpoint = cutoff)

# BANDWIDTH SELECTION
bw <- rdd_bw_ik(dat_temp) # IK
bw_cct <- rdbwselect(y = dat_temp$y, x = dat_temp$x, covs = dat_temp[, -c(1:2)], c = 0, vce = 'hc1') %>% 
  .[["bws"]] %>% .[1] # CCT

# CREATE MEDIATOR VARIABLES
df <- df %>%
  mutate(age_olderthan47 = as.numeric(age >= 48),
         nchild_lessthan2 = as.numeric(nchild <= 1),
         nchild_morethan1 = as.numeric(nchild > 1),
         married_married = as.numeric(married == 1),
         married_notmarried = as.numeric(married == 0),
         income2_lessthan850 = as.numeric(income2 <= 850),
         polinterest_high = as.numeric(polintrest %in% c(1, 2)),
         polinterest_low = as.numeric(polintrest %in% c(3, 4)))

# CREATE TURNOUT DATA
df_turnout <- df_dur %>%
  mutate(age_olderthan47 = as.numeric(age >= 48),
         nchild_lessthan2 = as.numeric(nchild <= 1),
         nchild_morethan1 = as.numeric(nchild > 1),
         married_married = as.numeric(married == 1),
         married_notmarried = as.numeric(married == 0),
         income2_lessthan850 = as.numeric(income2 <= 850))
